In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.linear_model import LinearRegression
In [18]:
Emp=pd.read_csv('Employee_perfromance_data.csv')
In [19]:
Emp.head()
Out[19]:
| Employee_ID | Department | Gender | Age | Job_Title | Hire_Date | Years_At_Company | Education_Level | Performance_Score | Monthly_Salary | Work_Hours_Per_Week | Projects_Handled | Overtime_Hours | Sick_Days | Remote_Work_Frequency | Team_Size | Training_Hours | Promotions | Employee_Satisfaction_Score | Resigned | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | IT | Male | 55 | Specialist | 03:05.6 | 2 | High School | 5 | 6750 | 33 | 32 | 22 | 2 | 0 | 14 | 66 | 0 | 2.63 | False |
| 1 | 2 | Finance | Male | 29 | Developer | 03:05.6 | 0 | High School | 5 | 7500 | 34 | 34 | 13 | 14 | 100 | 12 | 61 | 2 | 1.72 | False |
| 2 | 3 | Finance | Male | 55 | Specialist | 03:05.6 | 8 | High School | 3 | 5850 | 37 | 27 | 6 | 3 | 50 | 10 | 1 | 0 | 3.17 | False |
| 3 | 4 | Customer Support | Female | 48 | Analyst | 03:05.6 | 7 | Bachelor | 2 | 4800 | 52 | 10 | 28 | 12 | 100 | 10 | 0 | 1 | 1.86 | False |
| 4 | 5 | Engineering | Female | 36 | Analyst | 03:05.6 | 3 | Bachelor | 2 | 4800 | 38 | 11 | 29 | 13 | 100 | 15 | 9 | 1 | 1.25 | False |
In [9]:
Emp.shape
Out[9]:
(100000, 20)
In [10]:
Emp.describe()
Out[10]:
| Employee_ID | Age | Years_At_Company | Performance_Score | Monthly_Salary | Work_Hours_Per_Week | Projects_Handled | Overtime_Hours | Sick_Days | Remote_Work_Frequency | Team_Size | Training_Hours | Promotions | Employee_Satisfaction_Score | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 |
| mean | 50000.500000 | 41.029410 | 4.476070 | 2.995430 | 6403.211000 | 44.956950 | 24.431170 | 14.514930 | 7.008550 | 50.090500 | 10.013560 | 49.506060 | 0.999720 | 2.999088 |
| std | 28867.657797 | 11.244121 | 2.869336 | 1.414726 | 1372.508717 | 8.942003 | 14.469584 | 8.664026 | 4.331591 | 35.351157 | 5.495405 | 28.890383 | 0.815872 | 1.150719 |
| min | 1.000000 | 22.000000 | 0.000000 | 1.000000 | 3850.000000 | 30.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 1.000000 |
| 25% | 25000.750000 | 31.000000 | 2.000000 | 2.000000 | 5250.000000 | 37.000000 | 12.000000 | 7.000000 | 3.000000 | 25.000000 | 5.000000 | 25.000000 | 0.000000 | 2.010000 |
| 50% | 50000.500000 | 41.000000 | 4.000000 | 3.000000 | 6500.000000 | 45.000000 | 24.000000 | 15.000000 | 7.000000 | 50.000000 | 10.000000 | 49.000000 | 1.000000 | 3.000000 |
| 75% | 75000.250000 | 51.000000 | 7.000000 | 4.000000 | 7500.000000 | 53.000000 | 37.000000 | 22.000000 | 11.000000 | 75.000000 | 15.000000 | 75.000000 | 2.000000 | 3.990000 |
| max | 100000.000000 | 60.000000 | 10.000000 | 5.000000 | 9000.000000 | 60.000000 | 49.000000 | 29.000000 | 14.000000 | 100.000000 | 19.000000 | 99.000000 | 2.000000 | 5.000000 |
In [11]:
Emp.info
Out[11]:
<bound method DataFrame.info of Employee_ID Department Gender Age Job_Title Hire_Date \
0 1 IT Male 55 Specialist 03:05.6
1 2 Finance Male 29 Developer 03:05.6
2 3 Finance Male 55 Specialist 03:05.6
3 4 Customer Support Female 48 Analyst 03:05.6
4 5 Engineering Female 36 Analyst 03:05.6
... ... ... ... ... ... ...
99995 99996 Finance Male 27 Technician 03:05.6
99996 99997 IT Female 36 Consultant 03:05.6
99997 99998 Operations Male 53 Analyst 03:05.6
99998 99999 HR Female 22 Consultant 03:05.6
99999 100000 Finance Female 43 Analyst 03:05.6
Years_At_Company Education_Level Performance_Score Monthly_Salary \
0 2 High School 5 6750
1 0 High School 5 7500
2 8 High School 3 5850
3 7 Bachelor 2 4800
4 3 Bachelor 2 4800
... ... ... ... ...
99995 1 Bachelor 4 4900
99996 6 Master 5 8250
99997 8 High School 2 4800
99998 9 High School 5 8250
99999 0 PhD 1 4400
Work_Hours_Per_Week Projects_Handled Overtime_Hours Sick_Days \
0 33 32 22 2
1 34 34 13 14
2 37 27 6 3
3 52 10 28 12
4 38 11 29 13
... ... ... ... ...
99995 55 46 5 3
99996 39 35 7 0
99997 31 13 6 5
99998 35 43 10 1
99999 51 43 27 11
Remote_Work_Frequency Team_Size Training_Hours Promotions \
0 0 14 66 0
1 100 12 61 2
2 50 10 1 0
3 100 10 0 1
4 100 15 9 1
... ... ... ... ...
99995 75 16 48 2
99996 0 10 77 1
99997 0 5 87 1
99998 75 2 31 1
99999 75 13 45 1
Employee_Satisfaction_Score Resigned
0 2.63 False
1 1.72 False
2 3.17 False
3 1.86 False
4 1.25 False
... ... ...
99995 1.28 False
99996 3.48 True
99997 2.60 False
99998 3.10 False
99999 2.64 False
[100000 rows x 20 columns]>
In [12]:
sns.pairplot(Emp)
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_stats\counting.py:137: RuntimeWarning: Converting input from bool to <class 'numpy.uint8'> for compatibility.
bin_edges = np.histogram_bin_edges(vals, bins, binrange, weight)
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_stats\counting.py:176: RuntimeWarning: Converting input from bool to <class 'numpy.uint8'> for compatibility.
hist, edges = np.histogram(vals, **bin_kws, weights=weights, density=density)
Out[12]:
<seaborn.axisgrid.PairGrid at 0x2a3ca4c82d0>
In [13]:
Emp.head()
Out[13]:
| Employee_ID | Department | Gender | Age | Job_Title | Hire_Date | Years_At_Company | Education_Level | Performance_Score | Monthly_Salary | Work_Hours_Per_Week | Projects_Handled | Overtime_Hours | Sick_Days | Remote_Work_Frequency | Team_Size | Training_Hours | Promotions | Employee_Satisfaction_Score | Resigned | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | IT | Male | 55 | Specialist | 03:05.6 | 2 | High School | 5 | 6750 | 33 | 32 | 22 | 2 | 0 | 14 | 66 | 0 | 2.63 | False |
| 1 | 2 | Finance | Male | 29 | Developer | 03:05.6 | 0 | High School | 5 | 7500 | 34 | 34 | 13 | 14 | 100 | 12 | 61 | 2 | 1.72 | False |
| 2 | 3 | Finance | Male | 55 | Specialist | 03:05.6 | 8 | High School | 3 | 5850 | 37 | 27 | 6 | 3 | 50 | 10 | 1 | 0 | 3.17 | False |
| 3 | 4 | Customer Support | Female | 48 | Analyst | 03:05.6 | 7 | Bachelor | 2 | 4800 | 52 | 10 | 28 | 12 | 100 | 10 | 0 | 1 | 1.86 | False |
| 4 | 5 | Engineering | Female | 36 | Analyst | 03:05.6 | 3 | Bachelor | 2 | 4800 | 38 | 11 | 29 | 13 | 100 | 15 | 9 | 1 | 1.25 | False |
In [23]:
import seaborn as sns
import matplotlib.pyplot as plt
# Using countplot for categorical data
sns.countplot(x='Gender', data=Emp)
# Show the plot
plt.show()
In [24]:
# Count the number of males and females in the 'Gender' column
gender_counts = Emp['Gender'].value_counts()
# Display the counts
print(gender_counts)
Gender Male 48031 Female 48001 Other 3968 Name: count, dtype: int64
In [26]:
Emp.head()
Out[26]:
| Employee_ID | Department | Gender | Age | Job_Title | Hire_Date | Years_At_Company | Education_Level | Performance_Score | Monthly_Salary | Work_Hours_Per_Week | Projects_Handled | Overtime_Hours | Sick_Days | Remote_Work_Frequency | Team_Size | Training_Hours | Promotions | Employee_Satisfaction_Score | Resigned | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | IT | Male | 55 | Specialist | 03:05.6 | 2 | High School | 5 | 6750 | 33 | 32 | 22 | 2 | 0 | 14 | 66 | 0 | 2.63 | False |
| 1 | 2 | Finance | Male | 29 | Developer | 03:05.6 | 0 | High School | 5 | 7500 | 34 | 34 | 13 | 14 | 100 | 12 | 61 | 2 | 1.72 | False |
| 2 | 3 | Finance | Male | 55 | Specialist | 03:05.6 | 8 | High School | 3 | 5850 | 37 | 27 | 6 | 3 | 50 | 10 | 1 | 0 | 3.17 | False |
| 3 | 4 | Customer Support | Female | 48 | Analyst | 03:05.6 | 7 | Bachelor | 2 | 4800 | 52 | 10 | 28 | 12 | 100 | 10 | 0 | 1 | 1.86 | False |
| 4 | 5 | Engineering | Female | 36 | Analyst | 03:05.6 | 3 | Bachelor | 2 | 4800 | 38 | 11 | 29 | 13 | 100 | 15 | 9 | 1 | 1.25 | False |
In [29]:
# Group the data by 'Department' and count the number of employees in each department
Department = Emp.groupby('Department').size()
# Display the result
print(Department)
Department Customer Support 11116 Engineering 10956 Finance 11200 HR 10960 IT 11131 Legal 11118 Marketing 11216 Operations 11181 Sales 11122 dtype: int64
In [30]:
import matplotlib.pyplot as plt
# Department data (replace with your actual department count data)
department_count = {
'Customer Support': 11116,
'Engineering': 10956,
'Finance': 11200,
'HR': 10960,
'IT': 11131,
'Legal': 11118,
'Marketing': 11216,
'Operations': 11181,
'Sales': 11122
}
# Create a bar plot
plt.figure(figsize=(10,6))
plt.bar(department_count.keys(), department_count.values(), color='skyblue')
# Add labels and title
plt.xlabel('Department')
plt.ylabel('Number of Employees')
plt.title('Number of Employees in Each Department')
# Rotate x-axis labels for better visibility
plt.xticks(rotation=45)
# Show the plot
plt.tight_layout() # To prevent the labels from being cut off
plt.show()
In [31]:
pairplot_data = Emp[['Monthly_Salary', 'Work_Hours_Per_Week']]
# Create the pairplot
sns.pairplot(pairplot_data)
# Show the plot
plt.show()
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
In [32]:
Emp.head()
Out[32]:
| Employee_ID | Department | Gender | Age | Job_Title | Hire_Date | Years_At_Company | Education_Level | Performance_Score | Monthly_Salary | Work_Hours_Per_Week | Projects_Handled | Overtime_Hours | Sick_Days | Remote_Work_Frequency | Team_Size | Training_Hours | Promotions | Employee_Satisfaction_Score | Resigned | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | IT | Male | 55 | Specialist | 03:05.6 | 2 | High School | 5 | 6750 | 33 | 32 | 22 | 2 | 0 | 14 | 66 | 0 | 2.63 | False |
| 1 | 2 | Finance | Male | 29 | Developer | 03:05.6 | 0 | High School | 5 | 7500 | 34 | 34 | 13 | 14 | 100 | 12 | 61 | 2 | 1.72 | False |
| 2 | 3 | Finance | Male | 55 | Specialist | 03:05.6 | 8 | High School | 3 | 5850 | 37 | 27 | 6 | 3 | 50 | 10 | 1 | 0 | 3.17 | False |
| 3 | 4 | Customer Support | Female | 48 | Analyst | 03:05.6 | 7 | Bachelor | 2 | 4800 | 52 | 10 | 28 | 12 | 100 | 10 | 0 | 1 | 1.86 | False |
| 4 | 5 | Engineering | Female | 36 | Analyst | 03:05.6 | 3 | Bachelor | 2 | 4800 | 38 | 11 | 29 | 13 | 100 | 15 | 9 | 1 | 1.25 | False |
In [34]:
sns.displot(Emp['Job_Title'])
C:\Users\admin\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
Out[34]:
<seaborn.axisgrid.FacetGrid at 0x2a3807ff6d0>
In [35]:
sns.heatmap(Emp.corr(numeric_only=True))
Out[35]:
<Axes: >
In [36]:
Emp.head()
Out[36]:
| Employee_ID | Department | Gender | Age | Job_Title | Hire_Date | Years_At_Company | Education_Level | Performance_Score | Monthly_Salary | Work_Hours_Per_Week | Projects_Handled | Overtime_Hours | Sick_Days | Remote_Work_Frequency | Team_Size | Training_Hours | Promotions | Employee_Satisfaction_Score | Resigned | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | IT | Male | 55 | Specialist | 03:05.6 | 2 | High School | 5 | 6750 | 33 | 32 | 22 | 2 | 0 | 14 | 66 | 0 | 2.63 | False |
| 1 | 2 | Finance | Male | 29 | Developer | 03:05.6 | 0 | High School | 5 | 7500 | 34 | 34 | 13 | 14 | 100 | 12 | 61 | 2 | 1.72 | False |
| 2 | 3 | Finance | Male | 55 | Specialist | 03:05.6 | 8 | High School | 3 | 5850 | 37 | 27 | 6 | 3 | 50 | 10 | 1 | 0 | 3.17 | False |
| 3 | 4 | Customer Support | Female | 48 | Analyst | 03:05.6 | 7 | Bachelor | 2 | 4800 | 52 | 10 | 28 | 12 | 100 | 10 | 0 | 1 | 1.86 | False |
| 4 | 5 | Engineering | Female | 36 | Analyst | 03:05.6 | 3 | Bachelor | 2 | 4800 | 38 | 11 | 29 | 13 | 100 | 15 | 9 | 1 | 1.25 | False |
In [37]:
plt.scatter(Emp['Age'],Emp['Gender'])
plt.xlable("Age")
plt.ylable("Gender")
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Cell In[37], line 2 1 plt.scatter(Emp['Age'],Emp['Gender']) ----> 2 plt.xlable("Age") 3 plt.ylable("Gender") AttributeError: module 'matplotlib.pyplot' has no attribute 'xlable'
In [38]:
plt.scatter(Emp['Department'],Emp['Monthly_Salary'])
plt.xlable("Department")
plt.ylable("Monthly_Salary")
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Cell In[38], line 2 1 plt.scatter(Emp['Department'],Emp['Monthly_Salary']) ----> 2 plt.xlable("Department") 3 plt.ylable("Monthly_Salary") AttributeError: module 'matplotlib.pyplot' has no attribute 'xlable'
In [39]:
import seaborn as sns
sns.regplot(x="Years_At_Company",y="Promotions",data=Emp)
Out[39]:
<Axes: xlabel='Years_At_Company', ylabel='Promotions'>
In [40]:
Emp.head()
Out[40]:
| Employee_ID | Department | Gender | Age | Job_Title | Hire_Date | Years_At_Company | Education_Level | Performance_Score | Monthly_Salary | Work_Hours_Per_Week | Projects_Handled | Overtime_Hours | Sick_Days | Remote_Work_Frequency | Team_Size | Training_Hours | Promotions | Employee_Satisfaction_Score | Resigned | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | IT | Male | 55 | Specialist | 03:05.6 | 2 | High School | 5 | 6750 | 33 | 32 | 22 | 2 | 0 | 14 | 66 | 0 | 2.63 | False |
| 1 | 2 | Finance | Male | 29 | Developer | 03:05.6 | 0 | High School | 5 | 7500 | 34 | 34 | 13 | 14 | 100 | 12 | 61 | 2 | 1.72 | False |
| 2 | 3 | Finance | Male | 55 | Specialist | 03:05.6 | 8 | High School | 3 | 5850 | 37 | 27 | 6 | 3 | 50 | 10 | 1 | 0 | 3.17 | False |
| 3 | 4 | Customer Support | Female | 48 | Analyst | 03:05.6 | 7 | Bachelor | 2 | 4800 | 52 | 10 | 28 | 12 | 100 | 10 | 0 | 1 | 1.86 | False |
| 4 | 5 | Engineering | Female | 36 | Analyst | 03:05.6 | 3 | Bachelor | 2 | 4800 | 38 | 11 | 29 | 13 | 100 | 15 | 9 | 1 | 1.25 | False |
In [42]:
Emp.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 100000 entries, 0 to 99999 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Employee_ID 100000 non-null int64 1 Department 100000 non-null object 2 Gender 100000 non-null object 3 Age 100000 non-null int64 4 Job_Title 100000 non-null object 5 Hire_Date 100000 non-null object 6 Years_At_Company 100000 non-null int64 7 Education_Level 100000 non-null object 8 Performance_Score 100000 non-null int64 9 Monthly_Salary 100000 non-null int64 10 Work_Hours_Per_Week 100000 non-null int64 11 Projects_Handled 100000 non-null int64 12 Overtime_Hours 100000 non-null int64 13 Sick_Days 100000 non-null int64 14 Remote_Work_Frequency 100000 non-null int64 15 Team_Size 100000 non-null int64 16 Training_Hours 100000 non-null int64 17 Promotions 100000 non-null int64 18 Employee_Satisfaction_Score 100000 non-null float64 19 Resigned 100000 non-null bool dtypes: bool(1), float64(1), int64(13), object(5) memory usage: 14.6+ MB
In [44]:
## Sumarizing the stats of the data
Emp.describe()
Out[44]:
| Employee_ID | Age | Years_At_Company | Performance_Score | Monthly_Salary | Work_Hours_Per_Week | Projects_Handled | Overtime_Hours | Sick_Days | Remote_Work_Frequency | Team_Size | Training_Hours | Promotions | Employee_Satisfaction_Score | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 |
| mean | 50000.500000 | 41.029410 | 4.476070 | 2.995430 | 6403.211000 | 44.956950 | 24.431170 | 14.514930 | 7.008550 | 50.090500 | 10.013560 | 49.506060 | 0.999720 | 2.999088 |
| std | 28867.657797 | 11.244121 | 2.869336 | 1.414726 | 1372.508717 | 8.942003 | 14.469584 | 8.664026 | 4.331591 | 35.351157 | 5.495405 | 28.890383 | 0.815872 | 1.150719 |
| min | 1.000000 | 22.000000 | 0.000000 | 1.000000 | 3850.000000 | 30.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 1.000000 |
| 25% | 25000.750000 | 31.000000 | 2.000000 | 2.000000 | 5250.000000 | 37.000000 | 12.000000 | 7.000000 | 3.000000 | 25.000000 | 5.000000 | 25.000000 | 0.000000 | 2.010000 |
| 50% | 50000.500000 | 41.000000 | 4.000000 | 3.000000 | 6500.000000 | 45.000000 | 24.000000 | 15.000000 | 7.000000 | 50.000000 | 10.000000 | 49.000000 | 1.000000 | 3.000000 |
| 75% | 75000.250000 | 51.000000 | 7.000000 | 4.000000 | 7500.000000 | 53.000000 | 37.000000 | 22.000000 | 11.000000 | 75.000000 | 15.000000 | 75.000000 | 2.000000 | 3.990000 |
| max | 100000.000000 | 60.000000 | 10.000000 | 5.000000 | 9000.000000 | 60.000000 | 49.000000 | 29.000000 | 14.000000 | 100.000000 | 19.000000 | 99.000000 | 2.000000 | 5.000000 |
In [48]:
## Check the missing values.
Emp.isnull()
Out[48]:
| Employee_ID | Department | Gender | Age | Job_Title | Hire_Date | Years_At_Company | Education_Level | Performance_Score | Monthly_Salary | Work_Hours_Per_Week | Projects_Handled | Overtime_Hours | Sick_Days | Remote_Work_Frequency | Team_Size | Training_Hours | Promotions | Employee_Satisfaction_Score | Resigned | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 1 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 2 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 3 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 4 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 99995 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 99996 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 99997 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 99998 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 99999 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
100000 rows × 20 columns
In [47]:
Emp.isnull().sum()
Out[47]:
Employee_ID 0 Department 0 Gender 0 Age 0 Job_Title 0 Hire_Date 0 Years_At_Company 0 Education_Level 0 Performance_Score 0 Monthly_Salary 0 Work_Hours_Per_Week 0 Projects_Handled 0 Overtime_Hours 0 Sick_Days 0 Remote_Work_Frequency 0 Team_Size 0 Training_Hours 0 Promotions 0 Employee_Satisfaction_Score 0 Resigned 0 dtype: int64
In [ ]:
## Exploratory Data Analysis
## Correlation
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: